{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas\n",
    "import glob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "files = glob.glob(\"results_70b/*.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfs = []\n",
    "for file in files:\n",
    "    df = pandas.read_csv(file)\n",
    "    df[\"Model\"] = file.split(\"/\")[-1].replace(\".csv\",\"\")\n",
    "    dfs.append(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pandas.concat(dfs, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>F1-Score</th>\n",
       "      <th>Recall</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>False-Alarm</th>\n",
       "      <th>Error Rate</th>\n",
       "      <th>Accept Rate</th>\n",
       "      <th>Pass Rate</th>\n",
       "      <th>Model</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>average</td>\n",
       "      <td>0.589711</td>\n",
       "      <td>0.928854</td>\n",
       "      <td>0.431985</td>\n",
       "      <td>0.509745</td>\n",
       "      <td>0.568015</td>\n",
       "      <td>0.490255</td>\n",
       "      <td>0.225134</td>\n",
       "      <td>0.352324</td>\n",
       "      <td>Mistral-7B-Instruct-v0.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>160</th>\n",
       "      <td>average</td>\n",
       "      <td>0.319527</td>\n",
       "      <td>0.221311</td>\n",
       "      <td>0.574468</td>\n",
       "      <td>0.655172</td>\n",
       "      <td>0.425532</td>\n",
       "      <td>0.344828</td>\n",
       "      <td>0.336879</td>\n",
       "      <td>0.080960</td>\n",
       "      <td>gpt-3.5-turbo-top3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>average</td>\n",
       "      <td>0.540755</td>\n",
       "      <td>0.523077</td>\n",
       "      <td>0.559671</td>\n",
       "      <td>0.653673</td>\n",
       "      <td>0.440329</td>\n",
       "      <td>0.346327</td>\n",
       "      <td>0.305898</td>\n",
       "      <td>0.203898</td>\n",
       "      <td>gpt-4o-ablation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>152</th>\n",
       "      <td>average</td>\n",
       "      <td>0.535714</td>\n",
       "      <td>0.945378</td>\n",
       "      <td>0.373754</td>\n",
       "      <td>0.415292</td>\n",
       "      <td>0.626246</td>\n",
       "      <td>0.584708</td>\n",
       "      <td>0.187567</td>\n",
       "      <td>0.337331</td>\n",
       "      <td>qwen2-7b-instruct</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>average</td>\n",
       "      <td>0.562189</td>\n",
       "      <td>0.773973</td>\n",
       "      <td>0.441406</td>\n",
       "      <td>0.472264</td>\n",
       "      <td>0.558594</td>\n",
       "      <td>0.527736</td>\n",
       "      <td>0.263193</td>\n",
       "      <td>0.338831</td>\n",
       "      <td>proactivellama-8b-plain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>writing</td>\n",
       "      <td>0.453608</td>\n",
       "      <td>0.314286</td>\n",
       "      <td>0.814815</td>\n",
       "      <td>0.736318</td>\n",
       "      <td>0.185185</td>\n",
       "      <td>0.263682</td>\n",
       "      <td>0.470588</td>\n",
       "      <td>0.109453</td>\n",
       "      <td>claude-3-sonnet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>writing</td>\n",
       "      <td>0.557377</td>\n",
       "      <td>0.419753</td>\n",
       "      <td>0.829268</td>\n",
       "      <td>0.731343</td>\n",
       "      <td>0.170732</td>\n",
       "      <td>0.268657</td>\n",
       "      <td>0.439024</td>\n",
       "      <td>0.169154</td>\n",
       "      <td>claude-3-sonnet-20240229-top5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>writing</td>\n",
       "      <td>0.549618</td>\n",
       "      <td>0.444444</td>\n",
       "      <td>0.720000</td>\n",
       "      <td>0.706468</td>\n",
       "      <td>0.280000</td>\n",
       "      <td>0.293532</td>\n",
       "      <td>0.403974</td>\n",
       "      <td>0.179104</td>\n",
       "      <td>claude-3-sonnet-ablation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158</th>\n",
       "      <td>writing</td>\n",
       "      <td>0.700680</td>\n",
       "      <td>0.971698</td>\n",
       "      <td>0.547872</td>\n",
       "      <td>0.562189</td>\n",
       "      <td>0.452128</td>\n",
       "      <td>0.437811</td>\n",
       "      <td>0.321053</td>\n",
       "      <td>0.512438</td>\n",
       "      <td>qwen2-7b-instruct</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>166</th>\n",
       "      <td>writing</td>\n",
       "      <td>0.261905</td>\n",
       "      <td>0.164179</td>\n",
       "      <td>0.647059</td>\n",
       "      <td>0.691542</td>\n",
       "      <td>0.352941</td>\n",
       "      <td>0.308458</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.054726</td>\n",
       "      <td>gpt-3.5-turbo-top3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>184 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Category  F1-Score    Recall  Precision  Accuracy  False-Alarm  \\\n",
       "0    average  0.589711  0.928854   0.431985  0.509745     0.568015   \n",
       "160  average  0.319527  0.221311   0.574468  0.655172     0.425532   \n",
       "32   average  0.540755  0.523077   0.559671  0.653673     0.440329   \n",
       "152  average  0.535714  0.945378   0.373754  0.415292     0.626246   \n",
       "40   average  0.562189  0.773973   0.441406  0.472264     0.558594   \n",
       "..       ...       ...       ...        ...       ...          ...   \n",
       "70   writing  0.453608  0.314286   0.814815  0.736318     0.185185   \n",
       "134  writing  0.557377  0.419753   0.829268  0.731343     0.170732   \n",
       "30   writing  0.549618  0.444444   0.720000  0.706468     0.280000   \n",
       "158  writing  0.700680  0.971698   0.547872  0.562189     0.452128   \n",
       "166  writing  0.261905  0.164179   0.647059  0.691542     0.352941   \n",
       "\n",
       "     Error Rate  Accept Rate  Pass Rate                          Model  \n",
       "0      0.490255     0.225134   0.352324       Mistral-7B-Instruct-v0.3  \n",
       "160    0.344828     0.336879   0.080960             gpt-3.5-turbo-top3  \n",
       "32     0.346327     0.305898   0.203898                gpt-4o-ablation  \n",
       "152    0.584708     0.187567   0.337331              qwen2-7b-instruct  \n",
       "40     0.527736     0.263193   0.338831        proactivellama-8b-plain  \n",
       "..          ...          ...        ...                            ...  \n",
       "70     0.263682     0.470588   0.109453                claude-3-sonnet  \n",
       "134    0.268657     0.439024   0.169154  claude-3-sonnet-20240229-top5  \n",
       "30     0.293532     0.403974   0.179104       claude-3-sonnet-ablation  \n",
       "158    0.437811     0.321053   0.512438              qwen2-7b-instruct  \n",
       "166    0.308458     0.333333   0.054726             gpt-3.5-turbo-top3  \n",
       "\n",
       "[184 rows x 10 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(by=\"Category\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 把model 列提前到第二列\n",
    "cols = list(df.columns)\n",
    "cols.remove(\"Model\")\n",
    "cols.insert(1, \"Model\")\n",
    "df = df[cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Model</th>\n",
       "      <th>F1-Score</th>\n",
       "      <th>Recall</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>False-Alarm</th>\n",
       "      <th>Error Rate</th>\n",
       "      <th>Accept Rate</th>\n",
       "      <th>Pass Rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>average</td>\n",
       "      <td>Mistral-7B-Instruct-v0.3</td>\n",
       "      <td>0.589711</td>\n",
       "      <td>0.928854</td>\n",
       "      <td>0.431985</td>\n",
       "      <td>0.509745</td>\n",
       "      <td>0.568015</td>\n",
       "      <td>0.490255</td>\n",
       "      <td>0.225134</td>\n",
       "      <td>0.352324</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>average</td>\n",
       "      <td>claude-3-sonnet</td>\n",
       "      <td>0.466667</td>\n",
       "      <td>0.348659</td>\n",
       "      <td>0.705426</td>\n",
       "      <td>0.688156</td>\n",
       "      <td>0.294574</td>\n",
       "      <td>0.311844</td>\n",
       "      <td>0.353690</td>\n",
       "      <td>0.136432</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>average</td>\n",
       "      <td>claude-3-sonnet-20240229-ablation</td>\n",
       "      <td>0.527027</td>\n",
       "      <td>0.428571</td>\n",
       "      <td>0.684211</td>\n",
       "      <td>0.685157</td>\n",
       "      <td>0.315789</td>\n",
       "      <td>0.314843</td>\n",
       "      <td>0.320463</td>\n",
       "      <td>0.175412</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>average</td>\n",
       "      <td>claude-3-sonnet-20240229-top1</td>\n",
       "      <td>0.231788</td>\n",
       "      <td>0.149573</td>\n",
       "      <td>0.514706</td>\n",
       "      <td>0.652174</td>\n",
       "      <td>0.485294</td>\n",
       "      <td>0.347826</td>\n",
       "      <td>0.514706</td>\n",
       "      <td>0.052474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>average</td>\n",
       "      <td>claude-3-sonnet-20240229-top3</td>\n",
       "      <td>0.446194</td>\n",
       "      <td>0.330739</td>\n",
       "      <td>0.685484</td>\n",
       "      <td>0.683658</td>\n",
       "      <td>0.314516</td>\n",
       "      <td>0.316342</td>\n",
       "      <td>0.360215</td>\n",
       "      <td>0.127436</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179</th>\n",
       "      <td>writing</td>\n",
       "      <td>llama3-70b</td>\n",
       "      <td>0.750903</td>\n",
       "      <td>0.936937</td>\n",
       "      <td>0.626506</td>\n",
       "      <td>0.656716</td>\n",
       "      <td>0.373494</td>\n",
       "      <td>0.343284</td>\n",
       "      <td>0.353414</td>\n",
       "      <td>0.517413</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180</th>\n",
       "      <td>writing</td>\n",
       "      <td>proactivellama-70b-checkpoint</td>\n",
       "      <td>0.444444</td>\n",
       "      <td>0.361111</td>\n",
       "      <td>0.577778</td>\n",
       "      <td>0.676617</td>\n",
       "      <td>0.422222</td>\n",
       "      <td>0.323383</td>\n",
       "      <td>0.380165</td>\n",
       "      <td>0.129353</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181</th>\n",
       "      <td>writing</td>\n",
       "      <td>proactivellama-70b-plain</td>\n",
       "      <td>0.591716</td>\n",
       "      <td>0.549451</td>\n",
       "      <td>0.641026</td>\n",
       "      <td>0.656716</td>\n",
       "      <td>0.358974</td>\n",
       "      <td>0.343284</td>\n",
       "      <td>0.419643</td>\n",
       "      <td>0.248756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>182</th>\n",
       "      <td>writing</td>\n",
       "      <td>proactivellama-8b-plain</td>\n",
       "      <td>0.751773</td>\n",
       "      <td>0.913793</td>\n",
       "      <td>0.638554</td>\n",
       "      <td>0.651741</td>\n",
       "      <td>0.361446</td>\n",
       "      <td>0.348259</td>\n",
       "      <td>0.425311</td>\n",
       "      <td>0.527363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>writing</td>\n",
       "      <td>qwen2-7b-instruct</td>\n",
       "      <td>0.700680</td>\n",
       "      <td>0.971698</td>\n",
       "      <td>0.547872</td>\n",
       "      <td>0.562189</td>\n",
       "      <td>0.452128</td>\n",
       "      <td>0.437811</td>\n",
       "      <td>0.321053</td>\n",
       "      <td>0.512438</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>184 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Category                              Model  F1-Score    Recall  \\\n",
       "0    average           Mistral-7B-Instruct-v0.3  0.589711  0.928854   \n",
       "1    average                    claude-3-sonnet  0.466667  0.348659   \n",
       "2    average  claude-3-sonnet-20240229-ablation  0.527027  0.428571   \n",
       "3    average      claude-3-sonnet-20240229-top1  0.231788  0.149573   \n",
       "4    average      claude-3-sonnet-20240229-top3  0.446194  0.330739   \n",
       "..       ...                                ...       ...       ...   \n",
       "179  writing                         llama3-70b  0.750903  0.936937   \n",
       "180  writing      proactivellama-70b-checkpoint  0.444444  0.361111   \n",
       "181  writing           proactivellama-70b-plain  0.591716  0.549451   \n",
       "182  writing            proactivellama-8b-plain  0.751773  0.913793   \n",
       "183  writing                  qwen2-7b-instruct  0.700680  0.971698   \n",
       "\n",
       "     Precision  Accuracy  False-Alarm  Error Rate  Accept Rate  Pass Rate  \n",
       "0     0.431985  0.509745     0.568015    0.490255     0.225134   0.352324  \n",
       "1     0.705426  0.688156     0.294574    0.311844     0.353690   0.136432  \n",
       "2     0.684211  0.685157     0.315789    0.314843     0.320463   0.175412  \n",
       "3     0.514706  0.652174     0.485294    0.347826     0.514706   0.052474  \n",
       "4     0.685484  0.683658     0.314516    0.316342     0.360215   0.127436  \n",
       "..         ...       ...          ...         ...          ...        ...  \n",
       "179   0.626506  0.656716     0.373494    0.343284     0.353414   0.517413  \n",
       "180   0.577778  0.676617     0.422222    0.323383     0.380165   0.129353  \n",
       "181   0.641026  0.656716     0.358974    0.343284     0.419643   0.248756  \n",
       "182   0.638554  0.651741     0.361446    0.348259     0.425311   0.527363  \n",
       "183   0.547872  0.562189     0.452128    0.437811     0.321053   0.512438  \n",
       "\n",
       "[184 rows x 10 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.sort_values(by=[\"Category\",\"Model\",\"F1-Score\"])\n",
    "df.reset_index(drop=True, inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(\"results_70b.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "xagent",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
